Workflow

This pipeline is for the analysis of a Lemur TB sample sequenced on Nanopore. The repository for the pipeline can be found at <https://github.com/mbhall88/LemurPaper>

Click the nodes to obtain details about each step.

AMR Prediction

Distance

Lineage

Quality Control

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
config.yaml
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
fast5_dir:            "fast5/"
model_config:         "dna_r9.4.1_450bps_hac_prom.cfg"  # the basecalling model config
decontam_db:          "/hps/nobackup/research/zi/projects/tech_wars/data/QC/decontam_db/"
containers:
  guppy:     "library://mbhall88/default/guppy-gpu:3.4.5"
  conda:     "docker://continuumio/miniconda3:4.7.12"
  pycoqc:    "docker://quay.io/biocontainers/pycoqc:2.5.0.23--py_0"
  krona:     "docker://quay.io/biocontainers/krona:2.7--pl526_2"
  mykrobe:   "docker://quay.io/biocontainers/mykrobe:0.8.2--py36h1c69254_1"
  snp-dists: "docker://quay.io/biocontainers/snp-dists:0.7.0--hed695b0_0"
  fasttree:  "docker://quay.io/biocontainers/fasttree:2.1.10--h516909a_4"
  bedtools:  "docker://quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0"
envs:
  aln_tools:            "envs/aln_tools.yaml"
  filter_reads:         "envs/filter_reads.yaml"
  rg:                   "envs/rg.yaml"
  generate_krona_input: "envs/generate_krona_input.yaml"
  filter_snps:          "envs/filter_snps.yaml"
  assign_lineages:      "envs/assign_lineages.yaml"
  consensus:            "envs/consensus.yaml"
  plot_distance_matrix: "envs/plot_distance_matrix.yaml"
  dotplot:              "envs/dotplot.yaml"
scripts:
  filter_reads:         "scripts/filter_reads.py"
  generate_krona_input: "scripts/generate_krona_input.py"
  filter_snps:          "scripts/apply_filters.py"
  assign_lineages:      "scripts/assign_lineages.py"
  consensus:            "scripts/consensus.py"
  plot_distance_matrix: "scripts/plot_distance_matrix.py"
  dotplot:              "scripts/distance_dot_plot.py"
captions:
  pycoqc:          "report/pycoqc.rst"
  krona:           "report/krona.rst"
  mykrobe:         "report/mykrobe.rst"
  lineage:         "report/lineage.rst"
  distance_matrix: "report/distance_matrix.rst"
  dotplot:         "report/dotplot.rst"
genome_size:          4411532
lineage_panel:        "resources/snps_for_typing.csv"
default_lineage:      "unknown"  # the name given to samples with no variants in the panel of lineage-defining variants
ref_lineage_position: 1692141  # the position in the lineage panel that defines the lineage of H37Rv
# taken from https://www.ncbi.nlm.nih.gov/assembly/GCA_004024665.1/
lemur_assembly_url:   "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/004/024/665/GCA_004024665.1_LemCat_v1_BIUU/GCA_004024665.1_LemCat_v1_BIUU_genomic.fna.gz"
h37rv:
  genome: "resources/h37rv.fa"
  mask:   "resources/compass-mask.bed"
# filters for nanopore bcftools calls
filters:
  min_depth:       0
  max_depth:       0
  min_qual:        60
  min_strand_bias: 0
  min_bqb:         0
  min_mqb:         0
  min_rpb:         0.05
  max_sgb:         -0.5
  min_vdb:         0.002
other_consensus_dir:  "/hps/nobackup/research/zi/projects/tech_wars/analysis/baseline_variants/consensus/"
other_consensuses:
  - "mada_116"
  - "mada_134"
  - "mada_135"
  - "mada_1-30"
  - "mada_1-17"
  - "mada_141"
  - "mada_121"
  - "mada_111"

Loading...